##
## DescriptiveStatsAndGraphs.R
##   descriptive statistics and plots, and creation of probabilities and their sample variances
##   for Chiappori-Salanie-Weiss (AER), "Partner choice..."
##


rm(list = ls())

require(lattice)

##########  auxiliary functions   ####################################################

## print interesting quantiles
pQuantiles <- function(x) {
  dx <- deparse(substitute(x))
  cat("Interesting quantiles of", dx, ":\n")
  print(quantile(x, c(1, 5, 10, 25, 50, 75, 90, 95, 99) / 100))
}


## print line(s) with stars

pstarline <- function(nl = 1, str = '') {
  cat("\n")
  for (i in 1:nl) {
    cat("     ********************************************* \n")
  }
  if (str != '') {
    cat(str)
    pstarline()
  }
  cat("\n")
}


######################################################################################



datadir <- "CSW-17-aer/Dataset/"
outdir <- "output_data/"

sinkit <- T     ## do we log the program results

## select 1 observation in
nselect <- 1

## we will drop everyone who is observed older than oldlim
oldlim <- 100

## we will drop couples with ages at marriage  smaller than husbminmarrage or wifeminmarrage
husbminmarrage <- 16
wifeminmarrage <- 16

## for singles, only keep the never married
onlyNeverMarried <- T

datacouples <- "CSWcouples0814.txt"
datasingles <- "CSWsingles0814.txt"

## I= education of man, J= education of woman
## year= of survey ; hhwt=household sampling weight

## for couples: year, hhwt, husbrace, wiferace, I, J, age-man, age-woman, year-marriage
bigdcouples <-
  read.table(file = paste(datadir, datacouples, sep = ''),
             header = F)
names(bigdcouples) <-
  c(
    "YearSurvey",
    "HHweight",
    "RaceHusband",
    "RaceWife",
    "EducHusband",
    "EducWife",
    "AgeHusband",
    "AgeWife",
    "YearMarried"
  )
##    and we add marst=1
bigdcouples$MarriageStatus <- 1
varNames <- names(bigdcouples)

## we group education into 2 categories
#the education of the individual (1=HSD, 2=HSG, 3=SC, 4=CG and CG+)
bigdcouples$EducHusband[bigdcouples$EducHusband %in% c(4,5)] <- 4
bigdcouples$EducWife[bigdcouples$EducWife%in% c(4,5)] <- 4


## for singles: year, hhwt, race, educ, age, sex (male 1, female 2), marst
bigdsingles <-
  read.table(file = paste(datadir, datasingles, sep = ''),
             header = F)
names(bigdsingles) <- c("YearSurvey",
                        "HHweight",
                        "Race",
                        "Education",
                        "Age",
                        "Gender",
                        "MarriageStatus")

bigdsingles$Education[bigdsingles$Education %in% c(4,5)] <- 4

if (onlyNeverMarried) {
  cat("\nDropping singles who ever married:\n")
  with(subset(bigdsingles, Gender == 2), sum(MarriageStatus == 6))
  with(subset(bigdsingles, Gender == 1), sum(MarriageStatus == 6))
  bigdsingles <- bigdsingles[bigdsingles$MarriageStatus == 6, ]
}




if (nselect > 1) {
  ncouples <- NROW(bigdcouples)
  bigdcouples <-
    bigdcouples[sample(ncouples, floor(ncouples / nselect)), ]
  nsingles <- NROW(bigdsingles)
  bigdsingles <-
    bigdsingles[sample(nsingles, floor(nsingles / nselect)), ]
  pstarline()
  cat("Selected one obs in", nselect, "\n")
  pstarline()
}

## groups
ng <- 4
namesgroups <-  c(
  "High School Dropout",
  "High School Graduate",
  "Some College",
  "College Graduate or Postgraduate"
)

# namesgroups <-  c(
#   "High School Dropout",
#   "High School Graduate",
#   "Some College",
#   "College Graduate",
#   "Postgraduate"
# )

smallnamesgroups <-  c("HSD", "HSG", "SC", "CG")

## keep only whites (1), or blacks (3)
codeRace <- c(1,3)
ageLim <- c(40,45)
racenamev <- c("Whites",  "Blacks")
raceadjv <- c("white", "black")
raceadjvs <- c("whites", "blacks")

for (irace in 1:2) {
  selectRace <- codeRace[irace]
  younglim <- ageLim[irace]
  ## we will drop everyone who is observed younger than younglim
  
  reclassOld <- 1
  
  ##  if reClassOld=1, we will reclassify couples as never married couples
  ##     with ages at marriage larger than husbmaxmarrage or wifemaxmarrage
  ##
  husbmaxmarrage <-  younglim
  wifemaxmarrage <- younglim
  
  
  racename <- racenamev[irace]
  raceadj <- raceadjv[irace]
  raceadjs <- raceadjvs[irace]
  
  pstarline()
  cat("Working on", racename, "; reclass=",  reclassOld,
      "; younglim=", younglim, "\n")
  pstarline()
  
  resdir <- paste0(outdir, racename, ng, "/")
  
  sinkfile <-
    paste0(outdir, "descstatsgraphs", racename, younglim, ".txt")
  if (sinkit) {
    sink(file = sinkfile)
  }
  
  
  dcouples <- bigdcouples
  
  dsingles <- bigdsingles
  
  ## select races
  if (selectRace > 0) {
    racehusb <- dcouples$RaceHusband
    racewife <- dcouples$RaceWife
    racesingle <- dsingles$Race
    
    pstarline()
    cat("Keeping ", raceadjs, "only\n")
    pstarline()
    dsingles <- dsingles[racesingle == selectRace, ]
    dcouples <-
      dcouples[(racewife == selectRace & racehusb == selectRace), ]
  }
  
  
  ## we drop marriages that occur within the year of the survey
  yearsurvey  <- dcouples$YearSurvey
  yearmarr <- dcouples$YearMarried
  cat("\nDropping",  sum(yearsurvey <= yearmarr),  "marriages  (",
      100 * mean(yearsurvey <= yearmarr),
      "%) that occurred in the year of the survey.\n")
  dcouples <- dcouples[(yearsurvey > yearmarr), ]
  
  ## for couples: ages at marriage
  yearsurvey  <- dcouples$YearSurvey
  yearmarr <- dcouples$YearMarried
  agehusb <- dcouples$AgeHusband
  agewife <- dcouples$AgeWife
  agemarrhusb <- agehusb - (yearsurvey - yearmarr)
  agemarrwife <- agewife - (yearsurvey - yearmarr)
  
  ## construct data
  
  ## select on current ages
  
  selagesinterval <-
    ((agehusb >= younglim) & (agehusb <= oldlim) &
       (agewife >= younglim) &
       (agewife <= oldlim))
  dcouples <- dcouples[selagesinterval, ]
  pstarline()
  cat("Dropped ", sum(!selagesinterval), " couples (",  100 * mean(!selagesinterval),
      "% of sample) with a partner younger than ",  younglim,
      " or older than ", oldlim, ".\n")
  pstarline()
  
  ## select on ages at marriage
  yearsurvey  <- dcouples$YearSurvey
  yearmarr <- dcouples$YearMarried
  agemarrhusb <- dcouples$AgeHusband - (yearsurvey - yearmarr)
  agemarrwife <- dcouples$AgeWife - (yearsurvey - yearmarr)
  selagesminmarr <-
    (agemarrhusb >= husbminmarrage &  agemarrwife >= wifeminmarrage)
  dcouples <- dcouples[selagesminmarr, ]
  pstarline()
  cat("Dropped ", sum(!selagesminmarr),  " couples (",
      100 * mean(!selagesminmarr),  "% of sample) with a partner below minimum age.\n")
  cat("   (minimum ages=", husbminmarrage, "for men and ",  wifeminmarrage, "for women.)\n")
  pstarline()
  
  
  if (reclassOld == 1) {
    yearsurvey  <- dcouples$YearSurvey
    yearmarr <- dcouples$YearMarried
    agemarrhusb <- dcouples$AgeHusband - (yearsurvey - yearmarr)
    agemarrwife <- dcouples$AgeWife - (yearsurvey - yearmarr)
    
    selagesmaxmarr <-
      ((agemarrhusb <= husbmaxmarrage) &
         (agemarrwife <= wifemaxmarrage))
    reclasscouples <- dcouples[(!selagesmaxmarr), ]
    nreclass <- NROW(reclasscouples)
    ## couples with one partner outside of the marriage age window:
    ##                   we reclassify both partners as single
    ## for couples: year, hhwt, husbrace, wiferace, I, J, age-man, age-woman,
    ##    year-marriage, marst
    singlemenreclass <-
      data.frame(
        reclasscouples$YearSurvey,
        reclasscouples$HHweight,
        reclasscouples$RaceHusband,
        numeric(nreclass),
        reclasscouples$EducHusband,
        numeric(nreclass),
        reclasscouples$AgeHusband,
        matrix(0, nreclass, 2),
        reclasscouples$MarriageStatus
      )
    singlewomenreclass <-
      data.frame(
        reclasscouples$YearSurvey,
        reclasscouples$HHweight,
        numeric(nreclass),
        reclasscouples$RaceWife,
        numeric(nreclass),
        reclasscouples$EducWife,
        numeric(nreclass),
        reclasscouples$AgeWife,
        numeric(nreclass),
        reclasscouples$MarriageStatus
      )
    dcouples <- dcouples[selagesmaxmarr, ]
    pstarline()
    cat( "Reclassified ",  nreclass,
         " couples (",  100 * mean(!selagesmaxmarr),
         "% of sample) with a partner above maximum age.\n" )
    cat("   (maximum ages=", husbmaxmarrage,  "for men and ",
        wifemaxmarrage, "for women.)\n")
    pstarline()
  }
  
  
  yearsurvey  <- dcouples$YearSurvey
  yearmarr <- dcouples$YearMarried
  agemarrhusb <- dcouples$AgeHusband - (yearsurvey - yearmarr)
  agemarrwife <- dcouples$AgeWife - (yearsurvey - yearmarr)
  
  ## sampling weights for couples
  fw <- dcouples$HHweight
  
  
  cat("\n\nAges at marriage:\n")
  fwplot <- fw / mean(fw)
  tages <- xtabs(fwplot ~ agemarrhusb + agemarrwife)
  pdf(paste0(resdir, "AgesAtMarriage", selectRace, younglim, ".pdf"))
  plot(tages,
       main = paste0("Ages at Marriage for ", raceadjs),
       xlab = "Husband",
       ylab = "Wife",
       col = "lightgreen")
  dev.off()
  
  
  diffOfAges <- agemarrhusb - agemarrwife
  birthhusb <- yearsurvey - dcouples$AgeHusband
  
  
  cat("\n Quantiles of age at marriage of husband:\n")
  pQuantiles(agemarrhusb)
  cat(" Quantiles of age at marriage of wife:\n")
  pQuantiles(agemarrwife)
  cat(" Quantiles of age difference at marriage:\n")
  pQuantiles(diffOfAges)
  earlycoh <- (birthhusb %in% (1940:1942))
  cat("  for early cohorts:\n")
  pQuantiles(diffOfAges[earlycoh])
  cat("  for late cohorts:\n")
  if (selectRace != 3) {
    latecoh <- (birthhusb %in% (1968:1970))
  } else {
    latecoh <- (birthhusb %in% (1963:1965))
  }
  pQuantiles(diffOfAges[latecoh])
  cat("\nProportions in the age difference cells for early cohorts:\n")
  cat("  <= -1:", mean(diffOfAges[earlycoh]  <= -1), "\n")
  for (idiff in 0:4) {
    cat("  ==", idiff,  mean(diffOfAges[earlycoh]  ==  idiff), "\n")
  }
  cat("  >= 5:", mean(diffOfAges[earlycoh]  >= 5), "\n")
  cat("\nProportions in the age difference cells for late cohorts:\n")
  cat("  <= -1:", mean(diffOfAges[latecoh]  <= -1), "\n")
  for (idiff in 0:4) {
    cat("  ==", idiff,  mean(diffOfAges[latecoh]  ==  idiff), "\n")
  }
  cat("  >= 5:", mean(diffOfAges[latecoh]  >= 5), "\n")
  
  
  ## histogram of age difference
  pdf(file = paste0(resdir, "histAgeDiff", selectRace, younglim, ".pdf"))
  hist(
    agemarrhusb - agemarrwife,
    breaks = 100,
    main = paste0("Age difference between Spouses for ", raceadj, "s"),
    xlab = "Age difference",
    col = 3,
    xlim = c(-15, 20)
  )
  abline(v = 0, lty = 2)
  dev.off()
  
  
  
  ## window for plots
  begplot <- 1940
  #endplot <- 200 - younglim
  endplot <- 1966
  cohplot <- seq(begplot, endplot)
  ncohplot <- endplot - begplot + 1
  cohinds <- cohplot - begplot + 1
  
  ## relative educations in couples
  educman <- dcouples$EducHusband
  educwoman <- dcouples$EducWife
  yearmarr <- dcouples$YearMarried
  yearsurvey <- dcouples$YearSurvey
  
  manmoreeduc <- numeric(ncohplot)
  manlesseduc <- numeric(ncohplot)
  maneqeduc <- numeric(ncohplot)
  
  for (iyear in 1:ncohplot) {
    eman <- educman[birthhusb == (begplot + iyear - 1)]
    ewoman <- educwoman[birthhusb == (begplot + iyear - 1)]
    manmoreeduc[iyear] <- mean(eman > ewoman)
    maneqeduc[iyear] <- mean(eman == ewoman)
    manlesseduc[iyear] <- mean(eman < ewoman)
  }
  
  
  ## plot
  pdf(file = paste0(resdir, "ComparEducsCouple",
                    selectRace,
                    younglim,
                    ".pdf"))
  plot(cohplot,
       manmoreeduc,
       type = "l",
       xlim = c(1940,1966),
       xlab = "Year of birth of husband",
       #main = paste("Comparing educations within", raceadj, "couples"),
       ylim = c(0, 0.8),
       ylab = "Proportion",
       lwd = 3)
  lines(cohplot, maneqeduc, lty = "dashed", col = 1, lwd = 3)
  lines(cohplot, manlesseduc, lty = "dotted", col = 1, lwd =3)
  legend("top",
         c( "Husband more educated",
            "Same education",
            "Husband less educated"
         ),
         col = rep(1,3),
         lty = seq(3)
  )
  dev.off()
  
  ## Computing proportion of both college graduate partners in a couple - added by Rossi
  
  # using husband birth year
  bothcd <- numeric(ncohplot)
  
  for (iyear in 1:ncohplot) {
    eman <- educman[birthhusb == (begplot + iyear - 1)]
    ewoman <- educwoman[birthhusb == (begplot + iyear - 1)]
    bothcd[iyear] <- sum(eman == 4 & ewoman == 4)/length(eman)
  }
  
  ## plot
  pdf(file = paste0(resdir, "PropBothPartnersCD",
                    selectRace,
                    younglim,
                    ".pdf"))
  plot(cohplot,
       bothcd,
       type = "l",
       xlab = "Year of birth of husband",
       xlim = c(1940, 1966),
       #main = paste("Proportion of ", raceadj, "couples with both partners college graduates"),
       ylim = c(0, 0.8),
       ylab = "Proportion",
       lwd=3)
  legend("top",
         c( "Husband and wife both CG"),
         col = seq(1),
         lty = rep(1, 1)
  )
  dev.off()
  
  
  
  ## now work on combined data
  
  constructData <- function(gender) {
    ## for singles: year, hhwt, race, educ, age, sex (male 1, female 2), marst
    selsingle <- ((dsingles$Gender == gender) &
                    (dsingles$Age <= oldlim) &
                    (dsingles$Age >= younglim))
    nsinglesel <- sum(selsingle)
    dsinglesel <- dsingles[selsingle, ]
    ## construct data for single  shaped like the data for couples
    if (gender == 1) {
      newdsingle <-
        data.frame(
          dsinglesel$YearSurvey,
          dsinglesel$HHweight,
          dsinglesel$Race,
          numeric(nsinglesel),
          dsinglesel$Education,
          numeric(nsinglesel),
          dsinglesel$Age,
          matrix(0, nsinglesel, 2),
          dsinglesel$MarriageStatus
        )
    }
    if (gender == 2) {
      newdsingle <-
        data.frame(
          dsinglesel$YearSurvey,
          dsinglesel$HHweight,
          numeric(nsinglesel),
          dsinglesel$Race,
          numeric(nsinglesel),
          dsinglesel$Education,
          numeric(nsinglesel),
          dsinglesel$Age,
          numeric(nsinglesel),
          dsinglesel$MarriageStatus
        )
    }
    names(newdsingle) <- varNames
    ## now combine datasets
    if (reclassOld == 0) {
      ## concatenate couples and single
      newdata <- rbind(dcouples, newdsingle)
    }
    if (reclassOld == 1 & gender == 1) {
      names(singlemenreclass) <- varNames
      ## concatenate couples and single men (including reclassified)
      newdata <- rbind(dcouples, newdsingle, singlemenreclass)
    }
    if (reclassOld == 1 & gender == 2) {
      names(singlewomenreclass) <- varNames
      ## concatenate couples and single women (including reclassified)
      newdata <- rbind(dcouples, newdsingle, singlewomenreclass)
    }
    
    ### we return
    newdata
  }
  
  
  mendata <- constructData(1)
  womendata <- constructData(2)
  
  
  
  ## for a given gender, compute the matching patterns and their sample variance
  
  probasEtAl <- function(gender) {
    if (gender == 1) {
      genderAge <- mendata$AgeHusband
      yearsurvey <- mendata$YearSurvey
      gendereduc <- mendata$EducHusband
      partnereduc <- mendata$EducWife
      genderdata <- mendata
      genderstr <- "men"
    }
    
    if (gender == 2) {
      genderAge <- womendata$AgeWife
      yearsurvey <- womendata$YearSurvey
      gendereduc <- womendata$EducWife
      partnereduc <- womendata$EducHusband
      genderdata <- womendata
      genderstr <- "women"
    }
    
    
    cohort <- yearsurvey - genderAge
    
    
    ##  weights
    Wtgender <- genderdata$HHweight
    
    
    ## range of cohorts
    begcohorts <- min(cohort)
    endcohorts <- max(cohort)
    ncohorts <- endcohorts - begcohorts + 1
    numcohorts <- numeric(ncohorts)
    for (icoh in (begcohorts:endcohorts)) {
      numcohorts[icoh - begcohorts + 1] <- sum(Wtgender[cohort == icoh])
    }
    cat( "\n For", raceadj, genderstr, ", we have cohorts " ,
         begcohorts, " to ",  endcohorts,".\n")
    
    probsgender <- array(0, c(ng, ncohorts, (1 + ng)))
    
    popugender <- sum(Wtgender)
    
    
    ngender <- matrix(0, ng, ncohorts)
    
    selcoh <- cohort %in% (1935 + 5 * (1:7))
    cohortsel <- cohort[selcoh]
    gendereducsel <-
      factor(ng + 1 - gendereduc[selcoh], labels = rev(smallnamesgroups))
    Wtgendersel <- Wtgender[selcoh]
    tsel <- xtabs(Wtgendersel ~ cohortsel + gendereducsel)
    pdf(file = paste0(resdir, "Educ",
                      genderstr, selectRace, younglim, ".pdf"))
    plot(
      tsel,
      col = T,
      xlab = "Cohort",
      ylab = "Education",
      main = paste("Education of", raceadj, genderstr)
    )
    dev.off()
    
    cohindgender <- cohinds - (begcohorts - begplot)
    ncoh <- numcohorts[cohindgender]
    kfac <- matrix(0, ng, ncoh)
    
    ## compute probas of matching various partners
    for (ig in 1:ng) {
      for (icoh in 1:ncohorts) {
        gcohcond <- ((cohort == (begcohorts + icoh - 1)) & (gendereduc == ig))
        Wtgsel <- Wtgender[gcohcond]
        ngendergc <- sum(Wtgsel)
        ngender[ig, icoh] <- ngendergc
        cat( "There are ",  ngendergc, genderstr,  "of group ", ig,
             " in cohort ",   (begcohorts + icoh - 1), "\n")
        if (ngendergc > 0) {
          n2gendergc <- sum(Wtgsel * Wtgsel)
          kfac[ig, icoh] <- n2gendergc / (ngendergc * ngendergc)
          partnereducsel <- partnereduc[gcohcond]
          for (ih in 1:(ng + 1)) {
            gcohcondh <- (partnereducsel == (ih - 1))
            if (sum(gcohcondh) > 0) {
              probsgender[ig, icoh, ih] <- sum(Wtgsel[gcohcondh]) / ngendergc
            } else {
              probsgender[ig, icoh, ih] <- 0
            }
          }
        } else {
          probsgender[ig, icoh, ] <- 0
        }
      }
    }
    
    
    
    ## probas of (educ*cohort)
    pgender <- ngender / popugender
    
    
    pedugender <- matrix(0, ng, ncohplot)
    for (ig in 1:ng) {
      pedugender[ig, ] <- ngender[ig, cohindgender] / ncoh
    }
    
    
    pdf(file = paste0(resdir, "PropEduc", genderstr, selectRace,  younglim, ".pdf"))
    plot(
      cohplot,
      pedugender[1, ],
      type = "l",
      main = paste("Education of", raceadj, genderstr, "by cohort"),
      xlab = "Year of birth",
      ylab = "Proportion",
      ylim = c(0,  max(pedugender) + 0.1)
    )
    for (ig in 2:ng) {
      lines(cohplot, pedugender[ig, ], col = ig)
    }
    legend("topright",
           smallnamesgroups,
           col = seq(ng),
           lty = rep(1, ng))
    dev.off()
    
    
    
    ## never married
    
    pdf(file = paste0(resdir,  "Prop",  genderstr, "NeverMarried", selectRace, younglim, ".pdf"))
    plot(
      cohplot,
      probsgender[1, cohindgender, 1],
      col = 1,
      type = "l",
      #main = paste("Proportion of", raceadj,   genderstr,
       #            "who never married by cohort"),
      xlab = "Year of birth",
      ylab = "Proportion",
      ylim = c(0, 1.1 * max(probsgender[, cohindgender, 1]))
    )
    for (ig in 2:ng) {
      lines(cohplot, probsgender[ig, cohindgender, 1], col = ig, lty = 1)
    }
    legend("topleft",
           smallnamesgroups,
           col = seq(ng),
           lty = rep(1, ng))
    dev.off()
    
    ## now compute variances of probas
    
    ##  first set NA probas to 0 (if any!)
    probsgender[is.na(probsgender)] <- 0
    
    varpgender <- array(0, c(ng, ncohorts, (1 + ng), (1 + ng)))
    
    ## estimated covariances
    for (icoh in 1:ncohorts) {
      for (ig in 1:ng) {
        if (ngender[ig, icoh] > 0) {
          varpgender[ig, icoh, , ] <-
            -(probsgender[ig, icoh, ] %o% probsgender[ig, icoh, ])
          for (ih in 1:(1 + ng)) {
            varpgender[ig, icoh, ih, ih] <- varpgender[ig, icoh, ih, ih] +
              probsgender[ig, icoh, ih]
          }
          varpgender[ig, icoh, , ] <-
            varpgender[ig, icoh, , ] * kfac[ig, icoh]
        }
      }
    }
    
    ## we return
    list(probsgender,
         ngender,
         varpgender,
         begcohorts,
         endcohorts,
         pedugender)
  }
  
  
  ## apply to men
  cpmen <- probasEtAl(1)
  probsmen <- cpmen[[1]]
  nmen <- cpmen[[2]]
  varpmen <- cpmen[[3]]
  begcohmen <- cpmen[[4]]
  endcohmen <- cpmen[[5]]
  pedumen <- cpmen[[6]]
  
  ## apply to women
  cpwomen <- probasEtAl(2)
  probswomen <- cpwomen[[1]]
  nwomen <- cpwomen[[2]]
  varpwomen <- cpwomen[[3]]
  begcohwomen <- cpwomen[[4]]
  endcohwomen <- cpwomen[[5]]
  peduwomen <- cpwomen[[6]]
  
  ## number of men and women by cohort
  numcohmen <- colSums(nmen)
  numcohwomen <- colSums(nwomen)
  
  
  pdf(file = paste0(resdir, "CohortNumbers", selectRace, younglim, ".pdf"))
  plot((begcohmen:endcohmen),
       numcohmen,
       type = "l",
       main = paste(raceadj, "men and women by cohort"),
       xlab = "Cohort",
       ylab = "Number",
       col = "blue",
       ylim = c(0, 1.1 * max((
         c(numcohmen, numcohwomen)
       )))
  )
  lines((begcohwomen:endcohwomen), numcohwomen, col = "red")
  legend("bottom",
         c("Men", "Women"),
         col = c("blue", "red"),
         lty = rep(1, 2))
  dev.off()
  
  
  
  ## proportion of men by education level and cohort
  
  propmen <- numcohmen[cohplot - begcohmen] /
    (numcohmen[cohplot - begcohmen] + numcohwomen[cohplot - begcohwomen])
  propmeneduc <- nmen[, (cohplot - begcohmen)] /
    (nmen[, (cohplot - begcohmen)] + nwomen[, (cohplot - begcohwomen)])
  
  pdf(file = paste0(resdir, "PropofMen", selectRace, younglim, ".pdf"))
  plot(cohplot,
       propmeneduc[1, ],
       type = "l",
       main = paste0("Proportion of men in each (cohort*education) for ",
                     raceadj, "s"),
       xlab = "Year of birth",
       ylab = "Proportion",
       ylim = c(0.2, 0.75)
  )
  for (ig in 2:ng) {
    lines(cohplot, propmeneduc[ig, ], col = ig)
  }
  lines(cohplot, propmen, col = 8)
  abline(h = 0.5, lty = 2, col = 1)
  legend(
    "topright",
    c(smallnamesgroups, "All"),
    col = c(seq(ng), 8),
    lty = rep(1, ng + 1)
  )
  dev.off()
  
  
  ## numbers of men and women in each cohort
  ncohmenden <- numcohmen[cohplot - begcohmen]
  ncohwomenden <- numcohwomen[cohplot - begcohwomen]
  pdf(file = paste0(resdir, "CohortNumbers", selectRace, younglim, ".pdf"))
  plot(
    cohplot,
    ncohmenden,
    type = "l",
    xlab = "Year of birth",
    ylab = "Number",
    main = paste0("Number of observations in each cohort for ", raceadj, "s"),
    ylim = c(0, 1.1 * max(c(
      ncohmenden, ncohwomenden
    ))),
    col = "blue"
  )
  lines(cohplot, ncohwomenden, col = "red")
  legend("topleft",
         c("Men", "Women"),
         col = c("blue", "red"),
         lty = rep(1, 2))
  dev.off()
  
  
  ## plot marriage patterns
  
  plotMarrPatterns <- function(gender, donorm) {
    if (gender == 1) {
      genderAge <- mendata$AgeHusband
      yearsurvey <- mendata$YearSurvey
      gendereduc <- mendata$EducHusband
      partnereduc <- mendata$EducWife
      genderdata <- mendata
      genderstr <- "men"
      partnerstr <- "women"
    }
    
    if (gender == 2) {
      genderAge <- womendata$AgeWife
      yearsurvey <- womendata$YearSurvey
      gendereduc <- womendata$EducWife
      partnereduc <- womendata$EducHusband
      genderdata <- womendata
      genderstr <- "women"
      partnerstr <- "men"
    }
    
    
    cohort <- yearsurvey - genderAge
    
    ##  weights
    Wtgender <- genderdata$HHweight
    
    
    intv <- 3   ## take intv cohorts
    ## oldest person
    selold <- (cohort %in% cohplot[1:intv])
    ## youngest person
    selyoung <- (cohort %in% rev(cohplot)[intv:1])
    selthem <-
      selyoung - selold  ## 1 for young, -1 for old, else 0
    ## their education and their partners' (if any)
    Gsel <- Wtgender[abs(selthem) == 1]
    Geducsel <- gendereduc[abs(selthem) == 1]
    Peducsel <- partnereduc[abs(selthem) == 1]
    selsel <- selthem[abs(selthem) == 1]
    
    
    tx <- xtabs(Gsel ~ Geducsel + Peducsel + selsel)
    dtx <- array(0, c(ng, ng, 2))
    
    if (donorm) {
      ## only those who marry
      dtx[, , 1] <- tx[, (2:(ng + 1)), 1] / rowSums(tx[, 2:(ng + 1), 1])
      dtx[, , 2] <- tx[, (2:(ng + 1)), 2] / rowSums(tx[, 2:(ng + 1), 2])
      normstr <- "norm"
      normstr2 <- "who marry"
    } else {
      dtx[, , 1] <- tx[, (2:(ng + 1)), 1] / rowSums(tx[, , 1])
      dtx[, , 2] <- tx[, (2:(ng + 1)), 2] / rowSums(tx[, , 2])
      normstr <- ""
      normstr2 <- ""
    }
    
    n2 <- ng * ng * 2
    Gfac <- numeric(n2)
    Pfac <- numeric(n2)
    AgeG <- numeric(n2)
    Freq <- numeric(n2)
    i <- 1
    for (ig in 1:ng) {
      for (ih in 1:ng) {
        for (ia in 1:2) {
          Pfac[i] <- ih
          Gfac[i] <- ig
          AgeG[i] <- ia
          Freq[i] <- dtx[ig, ih, ia]
          i <- i + 1
        }
      }
    }
    fP <- factor(Pfac)
    levels(fP) <-
      c(paste(partnerstr, ": HSD", sep = ''), smallnamesgroups[-1])
    fG <- factor(Gfac)
    levels(fG) <-
      paste(paste(genderstr, ":", sep = ''), smallnamesgroups)
    fAgeG <- factor(AgeG)
    oldcoh <- paste(cohplot[1], "-", cohplot[3], sep = '')
    youngcoh <-
      paste(rev(cohplot)[3], "-", rev(cohplot)[1], sep = '')
    levels(fAgeG) <- paste("Born", c(oldcoh, youngcoh))
    pdf(file = paste0(resdir, "Marriage",  genderstr,  normstr, selectRace,
                      younglim, ".pdf"))
    print(
      barchart(
        fG ~ Freq | fAgeG,
        groups = fP,
        stack = T,
        key = list(
          title = paste("Marriage patterns of", raceadj, genderstr, normstr2),
          columns = ng,
          text = list(smallnamesgroups),
          rectangles = list(
            size = 2,
            border = "black",
            col = c("gray", (2:ng))
          )
        ),
        xlab = "Proportion",
        col = c("gray", (2:ng))
      )
    )
    dev.off()
    
    
  }
  
  plotMarrPatterns(1, F)
  plotMarrPatterns(1, T)
  plotMarrPatterns(2, F)
  plotMarrPatterns(2, T)
  
  
  ## education levels of men and women
  namesgroups2 <- c(rbind(
    paste(smallnamesgroups, "", "Men"),
    paste(smallnamesgroups, "", "Women")
  ))
  pdf(file = paste0(resdir, "PropEducsMenWomen", selectRace, younglim,  ".pdf"))
  plot(
    cohplot,
    pedumen[1, ],
    col = 1,
    type = "l",
    #main = paste("Education of", raceadj, "men and women by cohort"),
    xlab = "Year of birth",
    xlim = c(1940,1966),
    ylab = "Proportion",
    ylim = c(0, 0.8),
    lty = 4,
    lwd = 2
  )
  lines(cohplot, peduwomen[1, ], col = "darkgrey", lty = 4, lwd = 2)

    lines(cohplot, pedumen[2, ], col = 1, lty = 5, lwd = 2)
    lines(cohplot, peduwomen[2, ], col = "darkgrey", lty = 5, lwd = 2)
    
    lines(cohplot, pedumen[3, ], col = 1, lty = 3, lwd = 2)
    lines(cohplot, peduwomen[3, ], col = "darkgrey", lty = 3, lwd = 2)
    
    lines(cohplot, pedumen[4, ], col = 1, lty = 1, lwd = 2)
    lines(cohplot, peduwomen[4, ], col = "darkgrey", lty = 1, lwd = 2)

  legend(
    "top",
    namesgroups2,
    col = c(rbind(1,"darkgrey", 1,"darkgrey", 1,"darkgrey",1,"darkgrey")),
    lty = c(4,4,5,5,3,3,1,1),
    cex = 0.8
  )
  dev.off()
  
  
  save(probsmen,
       probswomen,
       varpmen,
       varpwomen,
       nmen,
       nwomen,
       cohinds,
       cohplot,
       varNames,
       namesgroups,
       smallnamesgroups,
       begcohmen,
       begcohwomen,
       endcohmen,
       endcohwomen,
       younglim,
       oldlim,
       husbminmarrage,
       wifeminmarrage,
       husbmaxmarrage,
       wifemaxmarrage,
       numcohmen,
       numcohwomen,
       mendata,
       womendata,
       file = paste0("output_data/", "probsnonpar", selectRace,   younglim, "_4educgrp", ".RData"))
  
  if (sinkit) {
    sink()
  }
}
